{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Computing native contacts with MDTraj\n",
    "\n",
    "Using the definition from Best, Hummer, and Eaton, \"Native contacts determine protein folding mechanisms in atomistic simulations\" PNAS (2013) [10.1073/pnas.1311599110](http://dx.doi.org/10.1073/pnas.1311599110)\n",
    "\n",
    "Eq. (1) of the SI defines the expression for the fraction of native contacts, $Q(X)$:\n",
    "\n",
    "$$\n",
    "Q(X) = \\frac{1}{|S|} \\sum_{(i,j) \\in S} \\frac{1}{1 + \\exp[\\beta(r_{ij}(X) - \\lambda r_{ij}^0)]},\n",
    "$$\n",
    "\n",
    "where\n",
    " - $X$ is a conformation,\n",
    " - $r_{ij}(X)$ is the distance between atoms $i$ and $j$ in conformation $X$,\n",
    " - $r^0_{ij}$ is the distance from heavy atom i to j in the native state conformation,\n",
    " - $S$ is the set of all pairs of heavy atoms $(i,j)$ belonging to residues $\\theta_i$ and $\\theta_j$ such that $|\\theta_i - \\theta_j| > 3$ and $r^0_{i,} < 4.5 \\unicode{x212B}$,\n",
    " - $\\beta=5 \\unicode{x212B}^{-1}$,\n",
    " - $\\lambda=1.8$ for all-atom simulations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import mdtraj as md\n",
    "from itertools import combinations\n",
    "\n",
    "def best_hummer_q(traj, native):\n",
    "    \"\"\"Compute the fraction of native contacts according the definition from\n",
    "    Best, Hummer and Eaton [1]\n",
    "    \n",
    "    Parameters\n",
    "    ----------\n",
    "    traj : md.Trajectory\n",
    "        The trajectory to do the computation for\n",
    "    native : md.Trajectory\n",
    "        The 'native state'. This can be an entire trajecory, or just a single frame.\n",
    "        Only the first conformation is used\n",
    "        \n",
    "    Returns\n",
    "    -------\n",
    "    q : np.array, shape=(len(traj),)\n",
    "        The fraction of native contacts in each frame of `traj`\n",
    "        \n",
    "    References\n",
    "    ----------\n",
    "    ..[1] Best, Hummer, and Eaton, \"Native contacts determine protein folding\n",
    "          mechanisms in atomistic simulations\" PNAS (2013)\n",
    "    \"\"\"\n",
    "    \n",
    "    BETA_CONST = 50  # 1/nm\n",
    "    LAMBDA_CONST = 1.8\n",
    "    NATIVE_CUTOFF = 0.45  # nanometers\n",
    "    \n",
    "    # get the indices of all of the heavy atoms\n",
    "    heavy = native.topology.select_atom_indices('heavy')\n",
    "    # get the pairs of heavy atoms which are farther than 3\n",
    "    # residues apart\n",
    "    heavy_pairs = np.array(\n",
    "        [(i,j) for (i,j) in combinations(heavy, 2)\n",
    "            if abs(native.topology.atom(i).residue.index - \\\n",
    "                   native.topology.atom(j).residue.index) > 3])\n",
    "    \n",
    "    # compute the distances between these pairs in the native state\n",
    "    heavy_pairs_distances = md.compute_distances(native[0], heavy_pairs)[0]\n",
    "    # and get the pairs s.t. the distance is less than NATIVE_CUTOFF\n",
    "    native_contacts = heavy_pairs[heavy_pairs_distances < NATIVE_CUTOFF]\n",
    "    print(\"Number of native contacts\", len(native_contacts))\n",
    "    \n",
    "    # now compute these distances for the whole trajectory\n",
    "    r = md.compute_distances(traj, native_contacts)\n",
    "    # and recompute them for just the native state\n",
    "    r0 = md.compute_distances(native[0], native_contacts)\n",
    "    \n",
    "    q = np.mean(1.0 / (1 + np.exp(BETA_CONST * (r - LAMBDA_CONST * r0))), axis=1)\n",
    "    return q  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# pull a random protein from the PDB\n",
    "# (The unitcell info happens to be wrong)\n",
    "traj = md.load_pdb('http://www.rcsb.org/pdb/files/2MI7.pdb')\n",
    "\n",
    "# just for example, use the first frame as the 'native' conformation\n",
    "q = best_hummer_q(traj, traj[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "plt.plot(q)\n",
    "plt.xlabel('Frame', fontsize=14)\n",
    "plt.ylabel('Q(X)', fontsize=14)\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
